import scrapy
from ..items import WeatherItem
class TianqiSpider(scrapy.Spider):
    name = "tianqi"
    #allowed_domains = ["www.xxx.com"]
    start_urls = ["http://www.weather.com.cn/"]
    pagenum=0
    imagenum=0



    def parse(self, response):
        list=response.xpath(".//a/@href").extract() #获得当前网页所有url
        imgurls=response.xpath(".//img/@src").extract() #获得所有的图片url信息
        for imgurl in imgurls:
            try:
                if(self.imagenum<124):
                    print(imgurl)
                    item=WeatherItem()
                    item['url']=imgurl
                    item['num']=self.imagenum
                    self.imagenum+=1
                    yield item
            except:
                continue

        for li in list:
            if(self.pagenum==24):
                break
            try:
                yield scrapy.Request(li,callback=self.parse)
                self.pagenum+=1
            except:
                continue
